import scrapy
from scrapyDemo.items import ScrapydemoItem
from scrapyDemo.logg import logger
import datetime
import re
import emoji
class BliblihotlistSpider(scrapy.Spider):
    name = 'blibliHotList'

    def start_requests(self):
        url_list = ['guochuang','douga','music','dance','game','knowledge','tech','sports','car','life','food','animal','kichiku','fashion','ent','cinephile','origin','rookie']
        #url_list = ['guochuang']
        for num in url_list:
            url = 'https://www.bilibili.com/v/popular/rank/%s'%(num)
            # url = 'www.bilibili.com/video/BV1j4411W7F7'
            yield scrapy.Request(url=url,callback=self.parse)

    # 转换列表或字符串含万字的
    def wan_in_str(self, string):
        try:
            if type(string) is int:
                return string
            elif string is None or len(string) == 0:
                return 0
            elif type(string) is str:
                if '万' in string:
                    string = int(float(string.split('万')[0]) * 10000)
                    return string
                elif 'w' in string:
                    string = int(float(string.split('w')[0]) * 10000)
                    return string
                elif '亿' in string:
                    string = int(float(string.split('亿')[0]) * 100000000)
                    return string
                else:
                    return int(string)
            elif type(string) is list:
                if '万' in string[0]:
                    string = int(float(string[0].split('万')[0]) * 10000)
                    return string
                elif 'w' in string[0]:
                    string = int(float(string[0].split('w')[0]) * 10000)
                    return string
                elif '亿' in string:
                    string = int(float(string[0].split('亿')[0]) * 100000000)
                    return string
                else:
                    return int(string[0])
        except Exception as e:
            print('万字转换失败。。。{}:{}'.format(string, type(string)))

    def parse(self, response):
        all = response.xpath('//*[@class="rank-list"]/li')
        item = ScrapydemoItem()
        try:
            for i in all:
                item['hotID'] = i.xpath('./div/div[2]/div/a/@href').get().split('/')[-1]
                title = i.xpath('./div/div[2]/a/text()').get().replace('\"',"").replace('\"',"").replace('\\',"\\\\").replace(' ',"")
                title = re.sub(r'(\\\\x[A-Za-z0-9][A-Za-z0-9])', '', title)
                item['hotTitle'] = re.sub(emoji.get_emoji_regexp(), r"", title)
                item['hotName'] = i.xpath('./div/div[2]/div/a/span/text()').get().replace('\n',"").replace(' ',"")
                item['hotUrl'] = i.xpath('./div/div[2]/a/@href').get().replace('//',"")
                # print(i.xpath('./div/div[1]/a/img/@src').get())
                videoNum =i.xpath('./div/div[2]/div/div/span[1]/text()').get().replace('\n',"").replace(' ',"") if i.xpath('./div/div[2]/div/div/span[1]/text()') else 0
                item['hotVideo'] = self.wan_in_str(videoNum)
                comNum = i.xpath('./div/div[2]/div/div/span[2]/text()').get().replace('\n',"").replace(' ',"") if i.xpath('./div/div[2]/div/div/span[2]/text()') else 0
                item['hotComment'] = self.wan_in_str(comNum)
                starttime = datetime.date.today()
                item['hotTime'] = starttime
                videoType = str(response.url).split('/')[-1]
                item['hotType'] = videoType
                yield item
        except Exception as e:
            logger.error(f"{response.url}访问页面失败：{e}")






